In [2]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
Explore the data and distributions¶
Load data and covert to appropriate types
In [3]:
# load data
site_data = pd.read_csv('data/SiteData.csv').convert_dtypes()
display(site_data.info())
<class 'pandas.core.frame.DataFrame'> RangeIndex: 13221 entries, 0 to 13220 Data columns (total 3 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 TIMESTAMP 13221 non-null string 1 READING 13221 non-null string 2 VALUE 13221 non-null Float64 dtypes: Float64(1), string(2) memory usage: 322.9 KB
None
change timestap to datetime¶
In [4]:
site_data['TIMESTAMP'] = pd.to_datetime(site_data['TIMESTAMP'])
site_data.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 13221 entries, 0 to 13220 Data columns (total 3 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 TIMESTAMP 13221 non-null datetime64[ns] 1 READING 13221 non-null string 2 VALUE 13221 non-null Float64 dtypes: Float64(1), datetime64[ns](1), string(1) memory usage: 322.9 KB
In [5]:
site_data.head()
Out[5]:
| TIMESTAMP | READING | VALUE | |
|---|---|---|---|
| 0 | 2020-08-24 00:00:00 | PX_GENERATOR_POWER | 0.0 |
| 1 | 2020-08-24 00:00:00 | PX_RECTIFIER_LOAD | 2.3867 |
| 2 | 2020-08-24 00:00:00 | PX_GENERATOR_1_FUEL_LEVEL | 400.43 |
| 3 | 2020-08-24 00:30:00 | PX_GENERATOR_POWER | 0.0 |
| 4 | 2020-08-24 00:30:00 | PX_RECTIFIER_LOAD | 2.4072 |
there are empty spaces in column names, fix that¶
In [6]:
site_data.columns = site_data.columns.str.strip()
In [7]:
site_data['TIMESTAMP'].dt.month.unique()
Out[7]:
array([ 8, 9, 10, 11], dtype=int32)
In [8]:
site_data['READING'].unique()
Out[8]:
<StringArray> ['PX_GENERATOR_POWER', 'PX_RECTIFIER_LOAD', 'PX_GENERATOR_1_FUEL_LEVEL'] Length: 3, dtype: string
In [9]:
fig = px.box(y=site_data['VALUE'], x = site_data['READING'], color=site_data['READING'], log_y=True, title='Statistics of each reading', labels={'y': 'value', 'x': 'reading'})
fig.update_layout()
fig.show()
In [10]:
generator_power = site_data[site_data['READING'] == 'PX_GENERATOR_POWER']
In [11]:
px.histogram(generator_power['VALUE'], log_y=True)
In [12]:
rectifier_load = site_data[site_data['READING'] == 'PX_RECTIFIER_LOAD']
In [13]:
px.histogram(rectifier_load['VALUE'], log_y=True)
In [14]:
fuel_level = site_data[site_data['READING'] == 'PX_GENERATOR_1_FUEL_LEVEL']
In [15]:
px.histogram(fuel_level['VALUE'])
In [16]:
rectifier_load[rectifier_load['VALUE'] < 0.6]
Out[16]:
| TIMESTAMP | READING | VALUE | |
|---|---|---|---|
| 6496 | 2020-10-08 07:00:00 | PX_RECTIFIER_LOAD | 0.4999 |
| 6499 | 2020-10-08 07:30:00 | PX_RECTIFIER_LOAD | 0.5661 |
In [17]:
# Create subplots
fig = make_subplots(rows=3, cols=1, shared_xaxes=True,
subplot_titles=['PX_GENERATOR_POWER',
'PX_RECTIFIER_LOAD',
'PX_GENERATOR_1_FUEL_LEVEL'])
# Add traces for PX_GENERATOR_POWER
data = site_data[site_data['READING'] == 'PX_GENERATOR_POWER']
fig.add_trace(go.Scatter(x=data['TIMESTAMP'], y=data['VALUE'], mode='lines+markers', name='PX_GENERATOR_POWER'), row=1, col=1)
# Add traces for PX_RECTIFIER_LOAD
data = site_data[site_data['READING'] == 'PX_RECTIFIER_LOAD']
fig.add_trace(go.Scatter(x=data['TIMESTAMP'], y=data['VALUE'], mode='lines+markers', name='PX_RECTIFIER_LOAD'), row=2, col=1)
# Add traces for PX_GENERATOR_1_FUEL_LEVEL
data = site_data[site_data['READING'] == 'PX_GENERATOR_1_FUEL_LEVEL']
fig.add_trace(go.Scatter(x=data['TIMESTAMP'], y=data['VALUE'], mode='lines+markers', name='PX_GENERATOR_1_FUEL_LEVEL'), row=3, col=1)
# Update layout
fig.update_layout(
title='Time Series for Each READING',
xaxis_title='Timestamp',
yaxis_title='Value',
height=900,
xaxis_rangeslider_visible=False
)
fig.show()
In [18]:
# Create subplots
fig = make_subplots(rows=3, cols=1, shared_xaxes=False,
subplot_titles=['PX_GENERATOR_POWER',
'PX_RECTIFIER_LOAD',
'PX_GENERATOR_1_FUEL_LEVEL'])
# Add traces for PX_GENERATOR_POWER
data = site_data[site_data['READING'] == 'PX_GENERATOR_POWER']
fig.add_trace(go.Histogram(x=data['VALUE'], name='PX_GENERATOR_POWER', opacity=0.75), row=1, col=1)
# Add traces for PX_RECTIFIER_LOAD
data = site_data[site_data['READING'] == 'PX_RECTIFIER_LOAD']
fig.add_trace(go.Histogram(x=data['VALUE'], name='PX_RECTIFIER_LOAD', opacity=0.75), row=2, col=1)
# Add traces for PX_GENERATOR_1_FUEL_LEVEL
data = site_data[site_data['READING'] == 'PX_GENERATOR_1_FUEL_LEVEL']
fig.add_trace(go.Histogram(x=data['VALUE'], name='PX_GENERATOR_1_FUEL_LEVEL', opacity=0.75), row=3, col=1)
# Update layout
fig.update_layout(
title='Histogram for Each READING',
height=900,
bargap=0.2,
bargroupgap=0.1,
yaxis_type ='log',
yaxis2_type = 'log'
)
fig.update_xaxes(title_text="Value", row=1, col=1)
fig.update_xaxes(title_text="Value", row=2, col=1)
fig.update_xaxes(title_text="Value", row=3, col=1)
fig.update_yaxes(title_text="Count", row=1, col=1)
fig.update_yaxes(title_text="Count", row=2, col=1)
fig.update_yaxes(title_text="Count", row=3, col=1)
fig.show()
In [ ]: